from typing import List

from langchain_community.document_loaders import TextLoader
from langchain_core.documents import Document
from langchain_text_splitters import CharacterTextSplitter


def test():
    # 加载文档
    loader = TextLoader("d:\\data.txt", 'utf-8')
    documents = loader.load()

    # 分割文档
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    docs: List[Document] = text_splitter.split_documents(documents)

    for doc in docs:
        print('>>>>>> ', doc.page_content)
